Code borrowed/adapted from Dr. Sara Knox for secondary flux data cleaning (following processing of high frequency data in EddyPro). Original code can be found here.
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Warning: package 'readxl' was built under R version 4.0.5
## Warning: package 'REddyProc' was built under R version 4.0.5
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.7 v dplyr 1.0.9
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## v purrr 0.3.4
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
## Warning: package 'lubridate' was built under R version 4.0.5
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Load "raw" (i.e. processed but not cleaned) data from EddyPro. (Note that this file was manually compiled from multiple months of processed data from EddyPro. Customizable variables at end of full output files were not appended because of mismatch across different months of processed data.)
input<-read.csv("C:/github/MLABcode/data/timeseries_fluxes_usrrc_2022partial.csv", header = TRUE)
input <- input %>%
mutate(DATE = paste(date, time, sep=' '))
input$DATE<-as.POSIXct(input$DATE,format="%m/%d/%Y %H:%M")
# Define new data frame
Data_QAQC<-input
ls(Data_QAQC) # list variables
## [1] "absolute_limits_hf" "air_density"
## [3] "air_heat_capacity" "air_molar_volume"
## [5] "air_pressure" "air_temperature"
## [7] "amplitude_resolution_hf" "attack_angle_hf"
## [9] "aux_in_LI.7200" "bad_aux_tc1_LI.7700"
## [11] "bad_aux_tc2_LI.7700" "bad_aux_tc3_LI.7700"
## [13] "bad_temp_LI.7700" "block_temp_unregulated_LI.7700"
## [15] "bottom_heater_on_LI.7700" "bowen_ratio"
## [17] "box_connected_LI.7700" "calibrating_LI.7700"
## [19] "ch4_def_timelag" "ch4_flux"
## [21] "ch4_mixing_ratio" "ch4_molar_density"
## [23] "ch4_mole_fraction" "ch4_scf"
## [25] "ch4_spikes" "ch4_strg"
## [27] "ch4_time_lag" "ch4_v.adv"
## [29] "ch4_var" "chopper_LI.7200"
## [31] "chopper_LI.7500" "co2_def_timelag"
## [33] "co2_flux" "co2_mixing_ratio"
## [35] "co2_molar_density" "co2_mole_fraction"
## [37] "co2_scf" "co2_signal_strength_7500_mean"
## [39] "co2_spikes" "co2_strg"
## [41] "co2_time_lag" "co2_v.adv"
## [43] "co2_var" "date"
## [45] "DATE" "daytime"
## [47] "delta_p_LI.7200" "detector_LI.7200"
## [49] "detector_LI.7500" "discontinuities_hf"
## [51] "discontinuities_sf" "DOY"
## [53] "drop_out_hf" "e"
## [55] "es" "ET"
## [57] "file_records" "filename"
## [59] "H" "H_scf"
## [61] "H_strg" "h2o_def_timelag"
## [63] "h2o_flux" "h2o_mixing_ratio"
## [65] "h2o_molar_density" "h2o_mole_fraction"
## [67] "h2o_scf" "h2o_spikes"
## [69] "h2o_strg" "h2o_time_lag"
## [71] "h2o_v.adv" "h2o_var"
## [73] "head_detect_LI.7200" "L"
## [75] "laser_temp_unregulated_LI.7700" "LE"
## [77] "LE_scf" "LE_strg"
## [79] "max_wind_speed" "mean_value_LI.7500"
## [81] "mean_value_RSSI_LI.7200" "model"
## [83] "motor_failure_LI.7700" "motor_spinning_LI.7700"
## [85] "no_signal_LI.7700" "non_steady_wind_hf"
## [87] "none_def_timelag" "none_flux"
## [89] "none_mixing_ratio" "none_molar_density"
## [91] "none_mole_fraction" "none_spikes"
## [93] "none_strg" "none_time_lag"
## [95] "none_v.adv" "none_var"
## [97] "not_ready_LI.7700" "pitch"
## [99] "pll_LI.7200" "pll_LI.7500"
## [101] "pump_on_LI.7700" "qc_ch4_flux"
## [103] "qc_co2_flux" "qc_H"
## [105] "qc_h2o_flux" "qc_LE"
## [107] "qc_none_flux" "qc_Tau"
## [109] "rand_err_ch4_flux" "rand_err_co2_flux"
## [111] "rand_err_H" "rand_err_h2o_flux"
## [113] "rand_err_LE" "rand_err_none_flux"
## [115] "rand_err_Tau" "re_unlocked_LI.7700"
## [117] "RH" "roll"
## [119] "rssi_77_mean" "skewness_kurtosis_hf"
## [121] "skewness_kurtosis_sf" "sonic_temperature"
## [123] "specific_humidity" "spikes_hf"
## [125] "sync_LI.7200" "sync_LI.7500"
## [127] "T." "t_in_LI.7200"
## [129] "t_out_LI.7200" "Tau"
## [131] "Tau_scf" "Tdew"
## [133] "time" "timelag_hf"
## [135] "timelag_sf" "TKE"
## [137] "top_heater_on_LI.7700" "ts_spikes"
## [139] "ts_var" "u."
## [141] "u_rot" "u_spikes"
## [143] "u_unrot" "u_var"
## [145] "un_ch4_flux" "un_co2_flux"
## [147] "un_H" "un_h2o_flux"
## [149] "un_LE" "un_none_flux"
## [151] "un_none_scf" "un_Tau"
## [153] "used_records" "v_rot"
## [155] "v_spikes" "v_unrot"
## [157] "v_var" "VPD"
## [159] "w.ch4_cov" "w.co2_cov"
## [161] "w.h2o_cov" "w.none_cov"
## [163] "w.ts_cov" "w_rot"
## [165] "w_spikes" "w_unrot"
## [167] "w_var" "water_vapor_density"
## [169] "wind_dir" "wind_speed"
## [171] "X" "X.1"
## [173] "X.2" "X.3"
## [175] "X.4" "X.z.d..L"
## [177] "x_10." "x_30."
## [179] "x_50." "x_70."
## [181] "x_90." "x_offset"
## [183] "x_peak" "yaw"
## count number of useful flux values (i.e. Foken flag 0 or 1)
high_qual_flux <- input %>%
filter(qc_co2_flux == 0 | qc_co2_flux == 1)
count(high_qual_flux) #5837 of 7061 total flux values can be retained, or 82.7 %
## n
## 1 5837
length(unique(high_qual_flux$date)) # 151 days in date set
## [1] 151
## count values with quality flag of 2 or value of -9999 (based on Mauder & Foken system)
low_qual_flux <- input %>%
filter(qc_co2_flux == 2 | qc_co2_flux == -9999) %>%
summarize(n = n())
low_qual_flux # 1224 of 7061 total flux values need to be discarded
## n
## 1 1224
## count questionable but not terrible fluxes, Foken flag = 1
intermed_qual_flux <- input %>%
filter(qc_co2_flux == 1) %>%
summarize(n = n())
intermed_qual_flux
## n
## 1 2804
# visualize CO2 flux with DOY as the x variable (i.e. time)
plot1 <- high_qual_flux %>%
filter(co2_flux != -9999) %>%
ggplot(aes(x = DOY, y = co2_flux)) +
geom_point()
plot1
plot2 <- input %>%
filter(co2_flux != -9999,
co2_flux < 10,
co2_flux > -10) %>%
ggplot(aes(x = DOY, y = co2_flux)) +
geom_point()
plot2
# now methane flux
plotm1 <- input %>%
filter(ch4_flux != -9999) %>%
ggplot(aes(x = DOY, y = ch4_flux)) +
geom_point()
plotm1
# h20 flux
ploth1 <- input %>%
filter(h2o_flux != -9999) %>%
ggplot(aes(x = DOY, y = h2o_flux)) +
geom_point()
ploth1
ploth2 <- input %>%
filter(h2o_flux != -9999,
h2o_flux < 15,
h2o_flux > -5) %>%
ggplot(aes(x = DOY, y = h2o_flux)) +
geom_point()
ploth2
Apply some maximum and minimum values for data screening. Most of these have been determined empirically based on very limited data available at US-RRC in 2022. Note that all will need to be revisited and perhaps altered prior to “official” analysis for publication.
# pitch_max <- 7 #[deg] ## hmmmm....ask about this, our range is huge. -62.08 to 77.27
# pitch_min <- -7 #[deg]
# WD -> ADD LATER!
qc_flag <- 1 #[#]
w.ts_cov_max <- 0.5 #[m+1K+1s-1] ## our range is -0.196 to 0.247 ## CHECK for final
# ts_var_max <- 3 #[K+2] ## our range is 0.00219 to 13.324 ## CHECK for final
# mean_value_RSSI_LI.7200_min <- 50 ## ALL our values are -9999, we have no 7200 installed
mean_value_LI.7500_min <- 50 ## our range is 13 to 100 ## CHECK for final
h2o_flux_min <- -15 #[mmol+1s-1m-2] # our actual minimum is -23 or so, but -15 looks reasonable ## CHECK for final
h2o_flux_max <- 25 #[mmol+1s-1m-2] # our max is 25.493 ## CHECK for final
# h2o_var_max <- 9000 # our highest value is 345313, min is -9999 ## CHECK for final
# h2o_mean_max <- 2000 #[mmol/m3] ## CHECK for final
# co2_mean_min <- 0 #[mmol/m3] ## CHECK for final
# co2_var_max <- 0.2 # our range is 0.0000773 to 72.805 in the high qual fluxes. CHECK for final
co2_flux_max <- 100 #[µmol+1s-1m-2] ## eyeballing this! ## CHECK for final
co2_flux_min <- -75 #[µmol+1s-1m-2] ## eyeballing this! ## CHECK for final
rssi_77_mean_min <- 20 #[#] this looks pretty standard, don't expect to change it later
# ch4_mean_min <- 1.7 # [ppm] ## CHECK for final
# ch4_var_max <- 0.0005 ## our range is -9.99e+03 to 3.76 e-03 ## CHECK for final
ch4_flux_min <- -0.5 #[µmol+1s-1m-2] ## our range is -2 to 2, roughly. eyeballing this ## CHECK for final
ch4_flux_max <- 1.5 #[µmol+1s-1m-2] ## CHECK for final
ustar_max <- 1.2 # our range is 0.0056 to 0.8382 ## CHECK for final
wind_max <- 360 #degrees ## eyeballing from map ## CHECK for final
wind_min <- 100 #degrees ## eyeballing from map ## CHECK for final
# L_max <- 10000 # [m] - added 13/08/21 - D. Ng ## not sure what these are ## CHECK for final
# L_min <- -10000 # [m] - added 13/08/21 - D. Ng ## not sure what these are ## CHECK for final
# Plot filtering variable of interest to check values
plot_ly(data = Data_QAQC, x = ~DATE, y = ~co2_var, name = 'pitch', type = 'scatter', mode = 'line') %>%
layout(yaxis = list(range = c(-1, 1))) %>%
plotly::toWebGL()
## Warning: `arrange_()` was deprecated in dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Data_QAQC$badt <- ifelse(abs(input$w.ts_cov) > w.ts_cov_max | input$qc_H > qc_flag,1,0)
Data_QAQC$badq <- ifelse(input$h2o_flux < h2o_flux_min | input$h2o_flux > h2o_flux_max | input$co2_signal_strength_7500_mean < mean_value_LI.7500_min | input$qc_LE > qc_flag,1,0)
Data_QAQC$badc <- ifelse(input$co2_flux < co2_flux_min | input$co2_flux > co2_flux_max | input$co2_signal_strength_7500_mean < mean_value_LI.7500_min | input$qc_co2_flux > qc_flag,1,0)
Data_QAQC$badm <- ifelse(input$ch4_flux < ch4_flux_min | input$ch4_flux > ch4_flux_max | input$rssi_77_mean < rssi_77_mean_min | input$qc_ch4_flux > qc_flag,1,0)
Data_QAQC$badwind <- ifelse(input$wind_dir > wind_max | input$wind_dir < wind_min,1,0)